#-----------------------------------------------------------#
# DoS text files obtained from: http://cfariss.com/#contact #
#-----------------------------------------------------------#

rm(list = ls(all.names = TRUE))
gc()
options(scipen=999)
set.seed(2107)
packages <-c("dplyr","tidyverse","readtext","foreign","furrr",
             "tictoc", "tidytext", "udpipe", "lattice","devtools")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

devtools::install_github("till-tietz/rbow")
library(rbow)

setwd("PUT YOUR DIRECTORY HERE")

# to speed things up:
future::plan(multisession)
options(future.globals.maxSize= 800000000)

#--------------#
# Read in data #
#--------------#

#read cleaned + stemmed text csv
#you can select specific texts by filtering for country or year
text <- read.csv("./Figures/FigureA4/dos_stemmed_3.csv")%>%
  dplyr::select(-c(X))

# focus on Poland
texts <- text[which(text$country=="POL"),]
#texts <- texts[which(texts$year<1990),]

#turn text into a list of character vectors
text_list <- as.character(texts[,1]) %>%
  strsplit(., " ")

#this combines texts you wish to analyse as one set 
#(i.e get a joint bow analysis mean for all texts from 1971)
combined_texts <- do.call(c, text_list)

#place combined texts into a list to run bow_analysis
combined_texts <- list(text = combined_texts)

#---------------------------------------------------#
# Define dictionaries for phenomena and descriptors #
#---------------------------------------------------#

#define phenomena  
#(put all phenomena vectors into a list)
phenomena <- list(
  Surveillance = c("spy*","investig*","search*","surveil*","agent","espionage",
                        "infiltr*","observ*", "intelligence","watch","patrol",
                        "scrutini*","monitor*","inspect*","inquir*"),

  Killings = c("extrajudicial", "killing", "murder", "execut*", 
                               "assassin*", "execut*"),
  
  Disappearances = c("disappear*", "vanish*"),
  
  Torture = c("torture", "punish*", "abuse*", 
                               "punish*", "mistreat*", "persecut*"),
  
  Detention = c("detain","abduct*", "arrest*", 
                                 "intern*", "confine*", "incarcerate*"),
  
  Justice = c("trial", "justice", "judge")
  
)

# define secrecy, comprehensivenss, and permanence / descriptors dictionary 
# (put all descriptor vectors into a list)
descriptors_dict <- list(
  secretive = c("hide","clandestin*","conceal*","unseen","camouflag*",
                "confidential", "cover", "veil",
                "disguis*","unknown*","secret*","unrevealed",
                "mask*", "invisible","inconspicuous","unobtrusive"),
  continuous = c("continu*","indefinit*","persist*","relentless","unrelenting","lasting","enduring","everlast*","arrang*"),
  comprehensive = c("thorough","extens*","encompass","broad*","vast*","wide*","exhaustive","substanti*", "prolong*","length*","long","unavoid*","inelud*","inescap*")
  )


#-----------------------------------------------#
# phenomenon in context of descriptors analysis #
#-----------------------------------------------#

#analyze frequency of descriptor terms within the context of phenomena terms 
#if you would like to check the methodology you can look at code + descriptions 
#at https://github.com/till-tietz/rbow
bow_analysis <- rbow::bow_analysis(corpus = combined_texts, phenomenon = phenomena, 
                                   descriptors = descriptors_dict, window = 10,
                                   per_occurrence = TRUE, own_regex = FALSE)


#construct CIs 
cis <- rbow::bow_ci(bow_analysis_output = bow_analysis, bootstraps = 1000,
                    alpha = 0.95, window = 10, per_occurrence = TRUE,
                    bootstrap_terms = TRUE)


#plot results 
plot_data <- rbow::create_plot_data(bstrap_output = cis)
plot <- rbow::ci_plot(plot_data = plot_data[[1]], scale = "colour")

plot+
  theme(legend.position = "bottom", 
        text = element_text(family = "serif"),
        title = element_blank())

ggsave("fig_A4.pdf", width = 7, height = 7)


